#include <utils/random.h>
#include <seminix/kernel.h>
#include <seminix/start_kernel.h>
#include <seminix/mmap.h>
#include <seminix/tcb.h>
#include <seminix/cpu.h>
#include <seminix/param.h>
#include <seminix/rwsem.h>
#include <cap/vspace.h>
#include <cap/rlimit.h>
#include <cap/thread.h>
#include <cap/cap.h>
#include <asm/hwcap.h>
#include <asm/processor.h>
#include <asm/mmu_context.h>
#include <asm/elf.h>

static struct tcb *root_tsk;

#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT

static const char *argv_init[MAX_INIT_ARGS + 2] = { "init", NULL, };
static const char *envp_init[MAX_INIT_ENVS + 2] = { "HOME=/", "TERM=linux", NULL, };

static char __initdata rootserver_name[100];

static int __init rootserver(char *str)
{
    strcpy(rootserver_name, str);
    return 0;
}
early_param("rootserver", rootserver);

#if ELF_EXEC_PAGESIZE > UTILS_PAGE_SIZE
#define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
#else
#define ELF_MIN_ALIGN	UTILS_PAGE_SIZE
#endif

#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))

#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)

#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
#define STACK_ROUND(sp, items) \
    (((unsigned long) (sp - items)) &~ 15UL)
#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })

#ifndef ELF_BASE_PLATFORM
/*
 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 * will be copied to the user stack in the same manner as AT_PLATFORM.
 */
#define ELF_BASE_PLATFORM NULL
#endif

#define AT_VECTOR_SIZE_BASE 20

#ifndef AT_VECTOR_SIZE_ARCH
#define AT_VECTOR_SIZE_ARCH 0
#endif
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))

static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex)
{
    size_t size;
    loff_t pos = elf_ex->e_phoff;

    /*
     * If the size of this structure has changed, then punt, since
     * we will be doing the wrong thing.
     */
    if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
        return NULL;

    /* Sanity check the number of program headers... */
    if (elf_ex->e_phnum < 1 ||
        elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
        return NULL;

    /* ...and their total size. */
    size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
    if (size > ELF_MIN_ALIGN)
        return NULL;

    return (struct elf_phdr *)((loff_t)elf_ex + pos);
}

static unsigned long elf_mmap(void *rootserver_start,
    unsigned long addr, unsigned long len,
    unsigned long prot, unsigned long flag,
    unsigned long offset)
{
    int ret;

    if (unlikely(offset + PAGE_ALIGN(len) < offset))
        return -EINVAL;
    if (unlikely(offset_in_page(offset)))
        return -EINVAL;
    // TODO 暂时将 exec 置成可写, 用于调试.
    ret = __mmap(root_tsk, root_tsk->mm, addr, len, prot | VM_WRITE, flag);
    BUG_ON(ret);

    memcpy((void *)addr, rootserver_start + offset, len);
    return 0;
}

static unsigned long elf_map(void *rootserver_start,
    unsigned long addr, const struct elf_phdr *eppnt,
    int prot, int type)
{
    unsigned long size = eppnt->p_memsz + ELF_PAGEOFFSET(eppnt->p_vaddr);
    unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
    addr = ELF_PAGESTART(addr);
    size = ELF_PAGEALIGN(size);

    if (!size)
        return addr;

    return elf_mmap(rootserver_start, addr, size, prot, type, off);
}

static unsigned long get_stack_top(void)
{
    int ret;
    unsigned long start = STACK_TOP_MAX - THREAD_SIZE;

    ret = __mmap(root_tsk, root_tsk->mm, start, THREAD_SIZE,
            SEMINIX_PROT_READ | SEMINIX_PROT_WRITE, SEMINIX_MAP_NOPAGEFAULT);
    BUG_ON(ret);

    return STACK_TOP_MAX;
}

struct arg_parse {
    unsigned long p, exec;
    int argc, envc;
    unsigned long arg_start, env_start;
    unsigned long arg_end, env_end;
};

struct user_arg_ptr {
    struct {
        const char __user *const __user *native;
    } ptr;
};

static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
{
    return *(argv.ptr.native + nr);
}

/*
 * count() counts the number of strings in array ARGV.
 */
static int count(struct user_arg_ptr argv, int max)
{
    int i = 0;

    if (argv.ptr.native != NULL) {
        for (;;) {
            const char __user *p = get_user_arg_ptr(argv, i);

            if (!p)
                break;

            if (IS_ERR(p))
                return -EFAULT;

            if (i >= max)
                return -E2BIG;
            ++i;
        }
    }
    return i;
}

static void copy_strings(int argc, struct user_arg_ptr argv,
            struct arg_parse *argsc)
{
    while (argc-- > 0) {
        const char __user *str;
        int len;

        str = get_user_arg_ptr(argv, argc);
        len = strlen(str);
        if (!len)
            panic("Failed to copy_strings\n");
        len += 1;
        argsc->p -= len;
        strcpy((char *)argsc->p, str);
    }
}

static struct arg_parse prase_args(unsigned long p)
{
    struct arg_parse argsc;
    struct user_arg_ptr argv = { .ptr.native = argv_init };
    struct user_arg_ptr envp = { .ptr.native = envp_init };

    argsc.argc = count(argv, MAX_INIT_ARGS);
    assert(argsc.argc >= 0);
    argsc.envc = count(envp, MAX_INIT_ENVS);
    assert(argsc.envc >= 0);

    argsc.p = p - sizeof(void *);

    /* To virt */
    argsc.exec = argsc.p;

    copy_strings(argsc.envc, envp, &argsc);
    argsc.env_start = argsc.p;
    copy_strings(argsc.argc, argv, &argsc);
    argsc.arg_start = argsc.p;

    return argsc;
}

static unsigned long create_elf_tables(struct elfhdr *exec, unsigned long load_addr)
{
    int i;
    unsigned long top = get_stack_top();
    unsigned long p = top - sizeof (void *);
    elf_addr_t __user *sp;
    elf_addr_t __user *u_platform;
    elf_addr_t __user *u_base_platform;
    elf_addr_t __user *u_rand_bytes;
    const char *k_platform = ELF_PLATFORM;
    const char *k_base_platform = ELF_BASE_PLATFORM;
    unsigned char k_rand_bytes[16];
    int items;
    elf_addr_t elf_info[AT_VECTOR_SIZE];
    int ei_index = 0;
    struct arg_parse argsc;

    p = arch_align_stack(p);

    argsc = prase_args(p);
    p = argsc.p;

    u_platform = NULL;
    if (k_platform) {
        size_t len = strlen(k_platform) + 1;

        u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
        memcpy(u_platform, k_platform, len);
    }

    /*
     * If this architecture has a "base" platform capability
     * string, copy it to userspace.
     */
    u_base_platform = NULL;
    if (k_base_platform) {
        size_t len = strlen(k_base_platform) + 1;

        u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
        memcpy(u_base_platform, k_base_platform, len);
    }
    /*
     * Generate 16 random bytes for userspace PRNG seeding.
     */
    utils_srand(ktime_get());
    for (i = 0; i < 16; i++)
        k_rand_bytes[i] = (char)utils_random();
    u_rand_bytes = (elf_addr_t __user *)
               STACK_ALLOC(p, sizeof(k_rand_bytes));
    memcpy(u_rand_bytes, k_rand_bytes, sizeof (k_rand_bytes));

    /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
#define NEW_AUX_ENT(id, val) \
    do { \
        elf_info[ei_index++] = id; \
        elf_info[ei_index++] = val; \
    } while (0)

#ifdef ARCH_DLINFO
    /*
     * ARCH_DLINFO must come first so PPC can do its special alignment of
     * AUXV.
     * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
     * ARCH_DLINFO changes
     */
    ARCH_DLINFO;
#endif
    NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
    NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
    NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
    NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
    NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
    NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
    NEW_AUX_ENT(AT_BASE, load_addr);
    NEW_AUX_ENT(AT_FLAGS, 0);
    NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
    NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
#ifdef ELF_HWCAP2
    NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
#endif
    NEW_AUX_ENT(AT_EXECFN, argsc.exec);
    if (k_platform) {
        NEW_AUX_ENT(AT_PLATFORM,
                (elf_addr_t)(unsigned long)u_platform);
    }
    if (k_base_platform) {
        NEW_AUX_ENT(AT_BASE_PLATFORM,
                (elf_addr_t)(unsigned long)u_base_platform);
    }
#undef NEW_AUX_ENT
    /* AT_NULL is zero; clear the rest too */
    memset(&elf_info[ei_index], 0,
           sizeof elf_info - ei_index * sizeof elf_info[0]);

    /* And advance past the AT_NULL entry.  */
    ei_index += 2;

    sp = STACK_ADD(p, ei_index);

    items = (argsc.argc + 1) + (argsc.envc + 1) + 1;
    argsc.p = STACK_ROUND(sp, items);
    sp = (void *)argsc.p;

    *sp++ = argsc.argc;
    p = argsc.arg_start;
    while (argsc.argc-- > 0) {
        size_t len;

        *sp++ = (elf_addr_t)p;
        len = strlen((void *)(p));
        assert(len);
        len++;
        p += len;
    }
    *sp++ = 0;
    argsc.arg_end = p;

    assert(argsc.env_start == p);
    argsc.env_end = p;
    while (argsc.envc-- > 0) {
        size_t len;

        *sp++ = (elf_addr_t)p;
        len = strlen((void *)(p));
        assert(len);
        len++;
        p += len;
    }
    *sp++ = 0;
    argsc.env_end = p;

    memcpy(sp, elf_info, ei_index * sizeof (elf_addr_t));

    return argsc.p;
}

static struct tcb *rootserver_create(unsigned long elf_header)
{
    int i;
    struct pt_regs *regs;
    unsigned long sp;
    void *rootserver_start = (void *)elf_header;
    unsigned long load_addr = 0;
    int load_addr_set = 0;
    struct elf_phdr *elf_ppnt, *elf_phdata;
    struct {
        struct elfhdr elf_ex;
    } *loc;
    int __maybe_unused bss_prot = 0;
    unsigned long elf_bss, elf_brk;
    unsigned long start_code, end_code, start_data, end_data;
    unsigned long elf_entry;
    unsigned long error;
    struct copy_thread_param param = { 0 };

    loc = rootserver_start;

    if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
        goto out;

    if (loc->elf_ex.e_type != ET_EXEC)
        goto out;
    if (!elf_check_arch(&loc->elf_ex))
        goto out;

    elf_phdata = load_elf_phdrs(&loc->elf_ex);
    if (!elf_phdata)
        goto out;

    elf_bss = 0;
    elf_brk = 0;

    start_code = ~0UL;
    end_code = 0;
    start_data = 0;
    end_data = 0;

    /* Now we do a little grungy work by mmapping the ELF image into
       the correct location in memory. */
    for(i = 0, elf_ppnt = elf_phdata;
        i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
        int elf_prot = 0, elf_flags = SEMINIX_MAP_NOPAGEFAULT;
        unsigned long k, vaddr;

        if (elf_ppnt->p_type != PT_LOAD)
            continue;

        assert(unlikely(elf_brk <= elf_bss));

        if (elf_ppnt->p_flags & PF_R)
            elf_prot |= SEMINIX_PROT_READ;
        if (elf_ppnt->p_flags & PF_W)
            elf_prot |= SEMINIX_PROT_WRITE;
        if (elf_ppnt->p_flags & PF_X)
            elf_prot |= SEMINIX_PROT_EXEC;

        vaddr = elf_ppnt->p_vaddr;

        error = elf_map(rootserver_start, vaddr, elf_ppnt, elf_prot, elf_flags);
        if (BAD_ADDR(error))
            panic("Not map elf\n");

        if (!load_addr_set) {
            load_addr_set = 1;
            load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
        }

        k = elf_ppnt->p_vaddr;
        if (k < start_code)
            start_code = k;
        if (start_data < k)
            start_data = k;

        /*
         * Check to see if the section's size will overflow the
         * allowed task size. Note that p_filesz must always be
         * <= p_memsz so it is only necessary to check p_memsz.
         */
        if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
            elf_ppnt->p_memsz > TASK_SIZE ||
            TASK_SIZE - elf_ppnt->p_memsz < k) {
            goto out;
        }

        k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;

        if (k > elf_bss)
            elf_bss = k;
        if ((elf_ppnt->p_flags & PF_X) && end_code < k)
            end_code = k;
        if (end_data < k)
            end_data = k;
        k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
        if (k > elf_brk) {
            bss_prot = elf_prot;
            elf_brk = k;
        }
    }

    elf_entry = loc->elf_ex.e_entry;
    if (BAD_ADDR(elf_entry))
        goto out;

    sp = create_elf_tables(&loc->elf_ex, load_addr);

#if 0
    printk("start code %pa\n", &start_code);
    printk("end   code %pa\n", &end_code);
    printk("start data %pa\n", &start_data);
    printk("end   data %pa\n", &end_data);
    printk("start bss  %pa\n", &elf_bss);
    printk("end   bss  %pa\n", &elf_brk);
    printk("entry      %pa\n", &elf_entry);
    printk("bss prot   %pa\n", &bss_prot);
    printk("load  addr %pa\n", &load_addr);
    printk("start sp   %pa\n", &sp);
#endif
    memset((void *)elf_bss, 0, elf_brk - elf_bss);

    i = __mmap(root_tsk, root_tsk->mm, 0x460000, UTILS_PAGE_SIZE,
        SEMINIX_PROT_READ | SEMINIX_PROT_WRITE, SEMINIX_MAP_NOPAGEFAULT);
    BUG_ON(i);
    param.new_tsk = root_tsk;
    param.copy_tsk = current;
    param.pc = elf_entry;
    param.stack = sp;
    copy_thread(&param);
    regs = task_pt_regs(root_tsk);
    start_thread(regs, elf_entry, sp);

out:
    return NULL;
}

struct tcb *rootserver_init(void)
{
    int index;
    cap_t *tsk_cap, *cnode_cap, *vspace_cap;
    struct tcb *tsk;
    seminix_object_t tcb_obj = { .type = seminix_TcbObject, };
    seminix_object_t cnode_obj = {
        .type = seminix_CnodeObject,
        .cnode.count = 1024,
    };
    seminix_object_t vspace_obj = { .type = seminix_VspaceObject, };
    struct vm_area_struct *vma;

    tsk_cap = create_rlimit_obj(&root_cap_rlimit, &tcb_obj, seminix_all_rights);
    BUG_ON(IS_ERR(tsk_cap));
    cnode_cap = create_rlimit_obj(&root_cap_rlimit, &cnode_obj, seminix_all_rights);
    BUG_ON(IS_ERR(cnode_cap));
    tsk = CAP_THREAD_PTR(tsk_cap)->task;
    root_tsk = tsk;

    index = cnode_get_unused_slot(CAP_CNODE_PTR(cnode_cap));
    BUG_ON(index < 0);
    cnode_cap_insert_slot(CAP_CNODE_PTR(cnode_cap), index, cnode_cap);
    tsk->cap_cnode = CAP_CNODE_PTR(cnode_cap);

    index = cnode_get_unused_slot(CAP_CNODE_PTR(cnode_cap));
    BUG_ON(index < 0);
    cnode_cap_insert_slot(CAP_CNODE_PTR(cnode_cap), index, CAP_REF(&root_cap_rlimit));
    tsk->cap_rlimit = &root_cap_rlimit;

    index = cnode_get_unused_slot(CAP_CNODE_PTR(cnode_cap));
    BUG_ON(index < 0);
    cnode_cap_insert_slot(CAP_CNODE_PTR(cnode_cap), index, tsk_cap);

    vspace_cap = create_rlimit_obj(&root_cap_rlimit, &vspace_obj, seminix_all_rights);
    BUG_ON(IS_ERR(vspace_cap));

    index = cnode_get_unused_slot(CAP_CNODE_PTR(cnode_cap));
    BUG_ON(index < 0);
    cnode_cap_insert_slot(CAP_CNODE_PTR(cnode_cap), index, vspace_cap);
    tsk->mm = CAP_VSPACE_PTR(vspace_cap)->mm;
    mm_set_task(CAP_VSPACE_PTR(vspace_cap)->mm, tsk);

    activate_mm(&init_mm, tsk->mm);

    rootserver_create((unsigned long)phys_to_virt(phys_initrd_start));
    strcpy(tsk->comm, "root");
    tsk->prio = MIN_RT_PRIO;
    tsk->mcprio = MIN_RT_PRIO;
    tsk->policy = SEMINIX_SCHED_FIFO;
    vma = find_exact_vma(tsk->mm, 0x460000, 0x461000);
    tsk->ipc_buffer = page_to_virt(vma->pages[0]);
    tsk->user_ipc_buffer = 0x460000;
    sched_new(tsk, TASK_NEW);
    sched_set_affinity(tsk, boot_cpu_id());

    return tsk;
}
